// This file is made available under Elastic License 2.0.
// This file is based on code available under the Apache license here:
// https://github.com/apache/orc/tree/main/tools/test/TestFileMetadata.cc

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "Adaptor.hh"
#include "ToolTest.hh"
#include "orc/OrcFile.hh"
#include "wrap/gmock.h"
#include "wrap/gtest-wrapper.h"

TEST(TestFileMetadata, testRaw) {
    const std::string pgm = findProgram("tools/src/orc-metadata");
    const std::string file = findExample("orc_split_elim.orc");
    const std::string expected = "Raw file tail: " + file +
                                 "\n"
                                 "postscript {\n"
                                 "  footerLength: 288\n"
                                 "  compression: NONE\n"
                                 "  version: 0\n"
                                 "  version: 12\n"
                                 "  metadataLength: 526\n"
                                 "  magic: \"ORC\"\n"
                                 "}\n"
                                 "footer {\n"
                                 "  headerLength: 3\n"
                                 "  contentLength: 245568\n"
                                 "  stripes {\n"
                                 "    offset: 3\n"
                                 "    indexLength: 137\n"
                                 "    dataLength: 45282\n"
                                 "    footerLength: 149\n"
                                 "    numberOfRows: 5000\n"
                                 "  }\n"
                                 "  stripes {\n"
                                 "    offset: 45571\n"
                                 "    indexLength: 137\n"
                                 "    dataLength: 45282\n"
                                 "    footerLength: 149\n"
                                 "    numberOfRows: 5000\n"
                                 "  }\n"
                                 "  stripes {\n"
                                 "    offset: 91139\n"
                                 "    indexLength: 137\n"
                                 "    dataLength: 45282\n"
                                 "    footerLength: 149\n"
                                 "    numberOfRows: 5000\n"
                                 "  }\n"
                                 "  stripes {\n"
                                 "    offset: 136707\n"
                                 "    indexLength: 138\n"
                                 "    dataLength: 45283\n"
                                 "    footerLength: 149\n"
                                 "    numberOfRows: 5000\n"
                                 "  }\n"
                                 "  stripes {\n"
                                 "    offset: 200000\n"
                                 "    indexLength: 137\n"
                                 "    dataLength: 45282\n"
                                 "    footerLength: 149\n"
                                 "    numberOfRows: 5000\n"
                                 "  }\n"
                                 "  types {\n"
                                 "    kind: STRUCT\n"
                                 "    subtypes: 1\n"
                                 "    subtypes: 2\n"
                                 "    subtypes: 3\n"
                                 "    subtypes: 4\n"
                                 "    subtypes: 5\n"
                                 "    fieldNames: \"userid\"\n"
                                 "    fieldNames: \"string1\"\n"
                                 "    fieldNames: \"subtype\"\n"
                                 "    fieldNames: \"decimal1\"\n"
                                 "    fieldNames: \"ts\"\n"
                                 "  }\n"
                                 "  types {\n"
                                 "    kind: LONG\n"
                                 "  }\n"
                                 "  types {\n"
                                 "    kind: STRING\n"
                                 "  }\n"
                                 "  types {\n"
                                 "    kind: DOUBLE\n"
                                 "  }\n"
                                 "  types {\n"
                                 "    kind: DECIMAL\n"
                                 "  }\n"
                                 "  types {\n"
                                 "    kind: TIMESTAMP\n"
                                 "  }\n"
                                 "  numberOfRows: 25000\n"
                                 "  statistics {\n"
                                 "    numberOfValues: 25000\n"
                                 "  }\n"
                                 "  statistics {\n"
                                 "    numberOfValues: 25000\n"
                                 "    intStatistics {\n"
                                 "      minimum: 2\n"
                                 "      maximum: 100\n"
                                 "      sum: 2499619\n"
                                 "    }\n"
                                 "  }\n"
                                 "  statistics {\n"
                                 "    numberOfValues: 25000\n"
                                 "    stringStatistics {\n"
                                 "      minimum: \"bar\"\n"
                                 "      maximum: \"zebra\"\n"
                                 "      sum: 124990\n"
                                 "    }\n"
                                 "  }\n"
                                 "  statistics {\n"
                                 "    numberOfValues: 25000\n"
                                 "    doubleStatistics {\n"
                                 "      minimum: 0.8\n"
                                 "      maximum: 80\n"
                                 "      sum: 200051.40000000002\n"
                                 "    }\n"
                                 "  }\n"
                                 "  statistics {\n"
                                 "    numberOfValues: 25000\n"
                                 "    decimalStatistics {\n"
                                 "      minimum: \"0\"\n"
                                 "      maximum: \"5.5\"\n"
                                 "      sum: \"16.6\"\n"
                                 "    }\n"
                                 "  }\n"
                                 "  statistics {\n"
                                 "    numberOfValues: 25000\n"
                                 "  }\n"
                                 "  rowIndexStride: 10000\n"
                                 "}\n"
                                 "fileLength: 246402\n"
                                 "postscriptLength: 19\n";
    std::string output;
    std::string error;

    EXPECT_EQ(0, runProgram({pgm, std::string("-r"), file}, output, error));
    EXPECT_EQ(expected, output);
    EXPECT_EQ("", error);

    EXPECT_EQ(0, runProgram({pgm, std::string("--raw"), file}, output, error));
    EXPECT_EQ(expected, output);
    EXPECT_EQ("", error);
}

TEST(TestFileMetadata, testJson) {
    const std::string pgm = findProgram("tools/src/orc-metadata");
    const std::string file = findExample("orc_split_elim.orc");
    const std::string expected = "{ \"name\": \"" + file +
                                 "\",\n"
                                 "  \"type\": "
                                 "\"struct<userid:bigint,string1:string,subtype:double,decimal1:decimal(0,0),ts:"
                                 "timestamp>\",\n"
                                 "  \"attributes\": {},\n"
                                 "  \"rows\": 25000,\n"
                                 "  \"stripe count\": 5,\n"
                                 "  \"format\": \"0.12\", \"writer version\": \"original\",\n"
                                 "  \"compression\": \"none\",\n"
                                 "  \"file length\": 246402,\n"
                                 "  \"content\": 245568, \"stripe stats\": 526, \"footer\": 288, \"postscript\": 19,\n"
                                 "  \"row index stride\": 10000,\n"
                                 "  \"user metadata\": {\n"
                                 "  },\n"
                                 "  \"stripes\": [\n"
                                 "    { \"stripe\": 0, \"rows\": 5000,\n"
                                 "      \"offset\": 3, \"length\": 45568,\n"
                                 "      \"index\": 137, \"data\": 45282, \"footer\": 149\n"
                                 "    },\n"
                                 "    { \"stripe\": 1, \"rows\": 5000,\n"
                                 "      \"offset\": 45571, \"length\": 45568,\n"
                                 "      \"index\": 137, \"data\": 45282, \"footer\": 149\n"
                                 "    },\n"
                                 "    { \"stripe\": 2, \"rows\": 5000,\n"
                                 "      \"offset\": 91139, \"length\": 45568,\n"
                                 "      \"index\": 137, \"data\": 45282, \"footer\": 149\n"
                                 "    },\n"
                                 "    { \"stripe\": 3, \"rows\": 5000,\n"
                                 "      \"offset\": 136707, \"length\": 45570,\n"
                                 "      \"index\": 138, \"data\": 45283, \"footer\": 149\n"
                                 "    },\n"
                                 "    { \"stripe\": 4, \"rows\": 5000,\n"
                                 "      \"offset\": 200000, \"length\": 45568,\n"
                                 "      \"index\": 137, \"data\": 45282, \"footer\": 149\n"
                                 "    }\n"
                                 "  ]\n"
                                 "}\n";

    std::string output;
    std::string error;

    EXPECT_EQ(0, runProgram({pgm, file}, output, error));
    EXPECT_EQ(expected, output);
    EXPECT_EQ("", error);
}

TEST(TestFileMetadata, testNoFormat) {
    const std::string pgm = findProgram("tools/src/orc-metadata");
    const std::string file = findExample("orc_no_format.orc");
    const std::string expected =
            "{ \"name\": \"" + file +
            "\",\n"
            "  \"type\": "
            "\"struct<_col0:array<string>,_col1:map<int,string>,_col2:struct<name:string,score:int>"
            ">\",\n"
            "  \"attributes\": {},\n"
            "  \"rows\": 5,\n"
            "  \"stripe count\": 1,\n"
            "  \"format\": \"0.11\", \"writer version\": \"original\",\n"
            "  \"compression\": \"zlib\", \"compression block\": 262144,\n"
            "  \"file length\": 745,\n"
            "  \"content\": 525, \"stripe stats\": 0, \"footer\": 210, \"postscript\": 9,\n"
            "  \"row index stride\": 10000,\n"
            "  \"user metadata\": {\n"
            "  },\n"
            "  \"stripes\": [\n"
            "    { \"stripe\": 0, \"rows\": 5,\n"
            "      \"offset\": 3, \"length\": 522,\n"
            "      \"index\": 224, \"data\": 187, \"footer\": 111\n"
            "    }\n"
            "  ]\n"
            "}\n";

    std::string output;
    std::string error;
    std::cout << expected;
    EXPECT_EQ(0, runProgram({pgm, file}, output, error));
    EXPECT_EQ(expected, output);
    EXPECT_EQ("", error);
}

TEST(TestFileMetadata, testAttributes) {
    const std::string pgm = findProgram("tools/src/orc-metadata");
    const std::string file = findExample("complextypes_iceberg.orc");
    const std::string expected =
            "{ \"name\": \"" + file +
            "\",\n"
            "  \"type\": "
            "\"struct<id:bigint,int_array:array<int>,int_array_array:array<array<int>>,int_map:map<"
            "string,int>,int_map_array:array<map<string,int>>,nested_struct:struct<a:int,b:array<"
            "int>,c:struct<d:array<array<struct<e:int,f:string>>>>,g:map<string,struct<h:struct<i:"
            "array<double>>>>>>\",\n"
            "  \"attributes\": {\n"
            "    \"id\": {\"iceberg.id\": \"1\", \"iceberg.long-type\": \"LONG\", "
            "\"iceberg.required\": \"false\"},\n"
            "    \"int_array\": {\"iceberg.id\": \"2\", \"iceberg.required\": \"false\"},\n"
            "    \"int_array._elem\": {\"iceberg.id\": \"7\", \"iceberg.required\": \"false\"},\n"
            "    \"int_array_array\": {\"iceberg.id\": \"3\", \"iceberg.required\": \"false\"},\n"
            "    \"int_array_array._elem\": {\"iceberg.id\": \"8\", \"iceberg.required\": "
            "\"false\"},\n"
            "    \"int_array_array._elem._elem\": {\"iceberg.id\": \"9\", \"iceberg.required\": "
            "\"false\"},\n"
            "    \"int_map\": {\"iceberg.id\": \"4\", \"iceberg.required\": \"false\"},\n"
            "    \"int_map._key\": {\"iceberg.id\": \"10\", \"iceberg.required\": \"true\"},\n"
            "    \"int_map._value\": {\"iceberg.id\": \"11\", \"iceberg.required\": \"false\"},\n"
            "    \"int_map_array\": {\"iceberg.id\": \"5\", \"iceberg.required\": \"false\"},\n"
            "    \"int_map_array._elem\": {\"iceberg.id\": \"12\", \"iceberg.required\": "
            "\"false\"},\n"
            "    \"int_map_array._elem._key\": {\"iceberg.id\": \"13\", \"iceberg.required\": "
            "\"true\"},\n"
            "    \"int_map_array._elem._value\": {\"iceberg.id\": \"14\", \"iceberg.required\": "
            "\"false\"},\n"
            "    \"nested_struct\": {\"iceberg.id\": \"6\", \"iceberg.required\": \"false\"},\n"
            "    \"nested_struct.a\": {\"iceberg.id\": \"15\", \"iceberg.required\": \"false\"},\n"
            "    \"nested_struct.b\": {\"iceberg.id\": \"16\", \"iceberg.required\": \"false\"},\n"
            "    \"nested_struct.b._elem\": {\"iceberg.id\": \"19\", \"iceberg.required\": "
            "\"false\"},\n"
            "    \"nested_struct.c\": {\"iceberg.id\": \"17\", \"iceberg.required\": \"false\"},\n"
            "    \"nested_struct.c.d\": {\"iceberg.id\": \"20\", \"iceberg.required\": "
            "\"false\"},\n"
            "    \"nested_struct.c.d._elem\": {\"iceberg.id\": \"21\", \"iceberg.required\": "
            "\"false\"},\n"
            "    \"nested_struct.c.d._elem._elem\": {\"iceberg.id\": \"22\", \"iceberg.required\": "
            "\"false\"},\n"
            "    \"nested_struct.c.d._elem._elem.e\": {\"iceberg.id\": \"23\", "
            "\"iceberg.required\": \"false\"},\n"
            "    \"nested_struct.c.d._elem._elem.f\": {\"iceberg.id\": \"24\", "
            "\"iceberg.required\": \"false\"},\n"
            "    \"nested_struct.g\": {\"iceberg.id\": \"18\", \"iceberg.required\": \"false\"},\n"
            "    \"nested_struct.g._key\": {\"iceberg.id\": \"25\", \"iceberg.required\": "
            "\"true\"},\n"
            "    \"nested_struct.g._value\": {\"iceberg.id\": \"26\", \"iceberg.required\": "
            "\"false\"},\n"
            "    \"nested_struct.g._value.h\": {\"iceberg.id\": \"27\", \"iceberg.required\": "
            "\"false\"},\n"
            "    \"nested_struct.g._value.h.i\": {\"iceberg.id\": \"28\", \"iceberg.required\": "
            "\"false\"},\n"
            "    \"nested_struct.g._value.h.i._elem\": {\"iceberg.id\": \"29\", "
            "\"iceberg.required\": \"false\"}},\n"
            "  \"rows\": 1,\n"
            "  \"stripe count\": 1,\n"
            "  \"format\": \"0.12\", \"writer version\": \"future - 9\",\n"
            "  \"compression\": \"zlib\", \"compression block\": 131072,\n"
            "  \"file length\": 1734,\n"
            "  \"content\": 1006, \"stripe stats\": 167, \"footer\": 535, \"postscript\": 25,\n"
            "  \"row index stride\": 10000,\n"
            "  \"user metadata\": {\n"
            "  },\n"
            "  \"stripes\": [\n"
            "    { \"stripe\": 0, \"rows\": 1,\n"
            "      \"offset\": 3, \"length\": 1003,\n"
            "      \"index\": 679, \"data\": 150, \"footer\": 174\n"
            "    }\n"
            "  ]\n"
            "}\n";

    std::string output;
    std::string error;
    std::cout << expected;
    EXPECT_EQ(0, runProgram({pgm, file}, output, error));
    EXPECT_EQ(expected, output);
    EXPECT_EQ("", error);
}
